/*
	Purpose: Using the sample of 1940 Census mothers aged
				30-50 (created in 0a), this file further restricts
				to black and white mothers. Average predicted mother
				income is calculated at various levels. Missing income
				is imputed at the occupation x race x south level. 

	Creates: All output files have the prefix 
				"avgincomes_mothers1940_"
*/

clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

use ./input/Census1940_full_raw_mothers30to50.dta, clear 

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

* Fix income
	count if incwage==0 
	count if fam_income==0 
	count if hh_income==0 
	
/* When family and household income are 0, it means all components 
   were 0 or missing. Replace these cases as "." */
	replace fam_income=. if number_missing_inc_fam==number_adult_universe_fam & fam_income==0 //all adults have missing income
	replace hh_income=. if number_missing_inc_hh == number_adult_universe_HH & hh_income==0
	
* Keep mothers with non-missing, non-zero personal income
	keep if incwage<. & incwage!=0
	assert hh_income!=0 & hh_income!=.
	
/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
	gen CPI1940 = 14
	gen CPI1950= 24.1
	
	foreach var of varlist incwage hh_income {
		replace `var' = `var' * (CPI1950 / CPI1940)
	}
	
* Restrict age range to 30-50, race to black and white
	keep if race<=200 //race==1 | race==2
	replace race = race/100
	
/* Note: Per Census documentation, OCCSCORE is a constructed 
         2-digit numeric variable that assigns occupational 
         income scores to each occupation. OCCSCORE represents 
         the median total income (in hundreds of 1950 dollars) 
         of all persons with that particular occupation in 1950. 
*/
	replace occscore=occscore*100
	
* State variable
	tab statefip, m
	rename statefip fips
	
* Region variable
	rename region region_og 

    * Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
    * Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
    /* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
              Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
    * West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division

	gen region=.
	replace region=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	replace region=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46 //12
	replace region=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 ///
	| fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48 //17
	replace region=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15 //13
	
	* Southern residence
	gen south_merge = region==3
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** ASSIGN COARSENED OCCS
****************

* Set up variables
	sort occ1950
	replace occ1950=. if occ1950>=980

* Count # of Census occupations in 1940 data
	bysort occ1950: gen nvals = _n ==1
	count if nvals==1 

* Separate people with occupations in 200's based on self-employment
	replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr<20
	
* Crosswalk Census occupations to coarsened ANES occupations
	merge m:1 occ1950 using ../Crosswalks/Crosswalk_1950Census_toANES.dta
	assert _merge!=1
	drop if _merge==2
	drop _merge
	
	tab occ1950 if occ1950ej==., m
	drop if occ1950ej==. 
	
	tempfile fulldata 
	save `fulldata'
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************
*** TEMPLATES
*******************

	gen number=1
	
* Template 1: occupation x race 
preserve

	collapse (min) race, by(occ1950ej)
	
	expand 2
	bysort occ1950ej: replace race=2 if _n==2

	tempfile occbyrace
	save `occbyrace'

restore 
	
* Template 2: race x south
preserve 
	
	collapse (min) race, by(south_merge)
	expand 2
	bysort south_merge: replace race=2 if _n==2
	
	tempfile south
	save `south'
	
restore
	
* Template 3: occupation x race x south
preserve

	use `occbyrace', clear
	joinby race using `south'
	
	tempfile template
	save `template'
	
restore

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

********************************************
*** COLLAPSE (OCCUPATION X RACE X SOUTH)
********************************************

	collapse (rawsum) number  (mean)  incwage fam_income hh_income , by(occ1950ej race south_merge) 

	tempfile income
	save `income'

* Merge into template
	use `template'
	merge 1:1 occ1950ej race south_merge using `income'
	drop _merge
	replace number=0 if number==.

* Count missings
	count if incwage==. | incwage==0 
	tab occ1950ej race if incwage==.
	tab south_merge if incwage==.
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************************
* IMPUTATIONS
****************************
	
	sort occ1950ej south_merge race
	local threshold "1"
	
	foreach c in incwage fam_income hh_income {
		
		by occ1950ej south_merge: gen white_black_ratio = `c'[_n-1]/`c'[_n] if number[_n-1]>=`threshold' & number>`threshold' //only use occs with sufficient obs

		quietly sum white_black_ratio if south_merge==1 [aw=number] //average white-black income gap in the south across occupations
		local ratio1 = r(mean)
		display `ratio1'
		
		levelsof occ1950ej if race==2 & south_merge==1 & number==0, local(occs1)
		foreach y in `occs1' {
			display "occupation: `y'"
			
			sum `c' if south_merge==1 & race==1 & occ1950ej==`y'
			replace `c' = `r(mean)' / `ratio1' if south_merge==1 & race==2 & occ1950ej==`y' 
			/* scale white income in the south in a given occupation by 
			   the average racial income gap in the south */
		}
		
		assert `c'!=.
		drop white_black_ratio 
	}
	

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

**********
* Save
**********

	label var incwage "Coarse income score, incwage, by race and south"
	label var hh_income "Coarse income score, household income, by race and south"
	label var fam_income "Coarse income score, family income, by race and south"
	

	label var race "Respondent race"
	label var number "Number of obs in 1940 occ x race x region cell"

	rename incwage avgincwage_1940_byrace_bysouth
	rename fam_income avg_faminc_1940_byrace_bysouth
	rename hh_income avg_HHinc_1940_byrace_bysouth
	rename occ1950ej motheroccej
	rename number number_1940obs_byrace_bysouth

	save ./output/avgincomes_mothers1940_byrace_bysouth.dta, replace
	
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

*******************************
* COLLAPSES AT OTHER LEVELS
*******************************

	use `fulldata', clear 
	rename occ1950ej motheroccej

	foreach x in byocc byocc_byr byocc_bys byrace bysouth byr_bys	{
	
	if "`x'"=="byocc" local varlist "motheroccej"
	if "`x'"=="byocc_byr" local varlist "motheroccej race"
	if "`x'"=="byocc_bys" local varlist "motheroccej south_merge"
	if "`x'"=="byrace" local varlist "race"
	if "`x'"=="bysouth" local varlist "south_merge"
	if "`x'"=="byr_bys" local varlist "race south_merge"	

		preserve 
			collapse  (mean)  hh_income , by(`varlist')
			
			label var hh_income "Mother household income, 1940, `varlist'"
			rename hh_income HHinc_1940_`x'
			
			save ./output/avgincomes_mothers1940_`x'.dta, replace
			
		restore 
	}



	
	
